#!/bin/sh -ex

rm -rf temp 
mkdir temp
cp tests/testpage.png temp/0001.png

# binarization
ocropus-nlbin 'temp/????.png'

# page level segmentation
ocropus-gpageseg 'temp/????.png'

# raw text line recognition
ocropus-lattices --parallel=3 'temp/????/??????.png'

# language model application
ocropus-ngraphs 'temp/????/??????.lattice'

# create hOCR output
ocropus-hocr 'temp/????.png' > temp.html
head -20 temp.html
tail -20 temp.html
